
from urllib import request
import re
import sys
import random
import os
import json


def find_chapter_infos(page_url):
    chapter_name = re.findall(r'pure-media-(?P<chapter_name>[^/]+)', page_url)[0]
    headers = {
        'User-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Mobile Safari/537.36'
    }

    req = request.Request(url=page_url, headers=headers)
    response = request.urlopen(req)
    html = response.read().decode('utf-8')

    image_regex = r'<a\shref=\"(?P<img_url>[^\"]+)\"\sclass=\"item\"\srel=\"images\"\sdata-fancybox-type=\"image\">'
    img_urls = re.findall(image_regex, html)

    task_file = open('./task.json', 'r', encoding='UTF8')
    download_task = dict(json.load(task_file))
    task_file.close()

    if download_task.get(comic_name) is None:
        download_task[comic_name] = {}
    if len(img_urls) > 0:
        chapter_task = {}
        count = 0
        for img_url in img_urls:
            if img_url[-1] == '/':
                image_name = img_url[:-1]
            count = count + 1
            match = re.findall(r'\/(?P<image_name>\d+)\.jpg$', image_name)
            if match and match[0]:
                image_name = match[0]
            else:
                image_name = str(count)
            chapter_task[image_name] = img_url
        download_task[comic_name][chapter_name] = chapter_task

    task_file = open('./task.json', 'w', encoding='UTF8')
    json_data = json.dumps(download_task, ensure_ascii=False)
    task_file.write(json_data)
    task_file.close()
    print('Finished index chapter:\t' + chapter_name)


comic_name = 'pure-media'
chapter_name = 'vol-210-yeha'

# page_url = 'https://www.xasiat.com/albums/13127/pure-media-vol-269-yeha-visit-suspicious-hospital-201p-290mb/'
page_url = 'https://www.xasiat.com/albums/4714/pure-media-vol-210-yeha/'

page_urls = [
    'https://www.xasiat.com/albums/24684/pure-media-vol-292-yeha-hina/',
    'https://www.xasiat.com/albums/19737/pure-media-vol-296-yeha/',
    'https://www.xasiat.com/albums/17198/pure-media-vol-287-yeha/',
    'https://www.xasiat.com/albums/15298/pure-media-vol-282-yeha-in-the-mood-for-love/',
    'https://www.xasiat.com/albums/14317/pure-media-vol-273-yeha-dreaming-with-library-girl/'
]

for page_url in page_urls:
    find_chapter_infos(page_url)
